{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import sys\n",
    "import os\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parameters Updating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SGD:\n",
    "    def __init__(self, lr=0.01):\n",
    "        self.lr = lr\n",
    "    \n",
    "    def update(self, params, grads):\n",
    "        for key in params.keys():\n",
    "            params[key] -= self.lr * grads[key]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Momentum:\n",
    "    def __init__(self, lr=0.01, momentum=0.9):\n",
    "        self.lr = lr\n",
    "        self.momentum = momentum\n",
    "        self.v = None\n",
    "    \n",
    "    def update(self, params, grads):\n",
    "        if self.v is None:\n",
    "            self.v = {}\n",
    "            for key, val in params.items():\n",
    "                self.v[key] = np.zeros_like(val)\n",
    "    \n",
    "        for key in params.keys():\n",
    "            self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]\n",
    "            params[key] += self.v[key]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AdaGrad:\n",
    "    def __init__(self, lr=0.01):\n",
    "        self.lr = lr\n",
    "        self.h = None\n",
    "    \n",
    "    def update(self, params, grads):\n",
    "        if self.h is None:\n",
    "            self.h = {}\n",
    "            for key, val in params.items():\n",
    "                self.h[key] = np.zeros_like(val)\n",
    "        \n",
    "        for key in params.keys():\n",
    "            self.h[key] += grads[key] * grads[key]\n",
    "            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cat deep_learning_from_scratch/ch06/optimizer_compare_naive.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cd deep_learning_from_scratch/ch06 && python3 optimizer_compare_naive.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cd deep_learning_from_scratch/ch06 && python3 weight_init_activation_histogram.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Initial Weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sigmoid(x):\n",
    "    return 1 / (1 + np.exp(-x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sigmoid_alpha(x):\n",
    "    return 2 / (1 + np.exp(-x)) - 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tanh(x):\n",
    "    return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def relu(x):\n",
    "    return np.maximum(x, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.random.randn(1000, 100)\n",
    "node_num = 100\n",
    "hidden_layer_size = 5\n",
    "activations = {}\n",
    "\n",
    "for i in range(hidden_layer_size):\n",
    "    if i != 0:\n",
    "        x = activations[i - 1]\n",
    "    \n",
    "    # w = np.random.randn(node_num, node_num) * 1\n",
    "    # w = np.random.randn(node_num, node_num) * 0.01\n",
    "    # w = np.random.randn(node_num, node_num) / np.sqrt(node_num)\n",
    "    w = np.random.randn(node_num, node_num) * (np.sqrt(2 / node_num))\n",
    "    \n",
    "    z = np.dot(x, w)\n",
    "    a = relu(z)\n",
    "    activations[i] = a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12, 4))\n",
    "for i, a in activations.items():\n",
    "    plt.subplot(1, len(activations), i + 1)\n",
    "    plt.title(f\"{i + 1}-layer\")\n",
    "    plt.hist(a.flatten(), 30, range=(0, 1))\n",
    "    # plt.ylim(0, 40000)\n",
    "    if i != 0: plt.tick_params(labelleft=False)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cd deep_learning_from_scratch/ch06 && python3 weight_init_compare.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Batch Normalization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cd deep_learning_from_scratch/ch06 && python3 batch_norm_test.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Regularization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from deep_learning_from_scratch.dataset.mnist import load_mnist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from deep_learning_from_scratch.common.functions import *\n",
    "from deep_learning_from_scratch.common.util import im2col, col2im\n",
    "with open(\"deep_learning_from_scratch/common/layers.py\") as f:\n",
    "    content = f.read()\n",
    "print(re.findall(r\".*common.*\", content))\n",
    "exec(re.sub(r\".*common.*\", \"\", content))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from deep_learning_from_scratch.common.gradient import numerical_gradient\n",
    "with open('deep_learning_from_scratch/common/multi_layer_net.py', 'r') as f:\n",
    "    content = f.read()\n",
    "print(re.findall(r\".*common.*\", content))\n",
    "exec(re.sub(r\".*common.*\", \"\", content))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Weight decay"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)\n",
    "# 過学習を再現するために、学習データを削減\n",
    "x_train = x_train[:300]\n",
    "t_train = t_train[:300]\n",
    "\n",
    "network = MultiLayerNet(input_size=784, hidden_size_list=[100]*6, output_size=10)\n",
    "optimizer = SGD(lr=0.01)\n",
    "\n",
    "max_epochs = 201\n",
    "train_size = x_train.shape[0]\n",
    "batch_size = 100\n",
    "\n",
    "train_loss_list = []\n",
    "train_acc_list = []\n",
    "test_acc_list = []\n",
    "\n",
    "iter_per_epoch = max(train_size / batch_size, 1)\n",
    "epoch_cnt = 0\n",
    "\n",
    "for i in range(int(1e+9)):\n",
    "    batch_mask = np.random.choice(train_size, batch_size)\n",
    "    x_batch = x_train[batch_mask]\n",
    "    t_batch = t_train[batch_mask]\n",
    "    \n",
    "    grads = network.gradient(x_batch, t_batch)\n",
    "    optimizer.update(network.params, grads)\n",
    "    \n",
    "    if i % iter_per_epoch == 0:\n",
    "        train_acc = network.accuracy(x_train, t_train)\n",
    "        test_acc = network.accuracy(x_test, t_test)\n",
    "        train_acc_list.append(train_acc)\n",
    "        test_acc_list.append(test_acc)\n",
    "        \n",
    "        epoch_cnt += 1\n",
    "        if epoch_cnt >= max_epochs:\n",
    "            break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "markers = {'train': 'o', 'test': 's'}\n",
    "plt.plot(range(1, 202), train_acc_list, marker='o', label='train', markevery=10)\n",
    "plt.plot(range(1, 202), test_acc_list, marker='s', label='test', markevery=10)\n",
    "plt.xlabel(\"epochs\")\n",
    "plt.ylabel(\"accuracy\")\n",
    "plt.xlim(0, 201)\n",
    "plt.ylim(0, 1)\n",
    "plt.legend(loc='lower right')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cd deep_learning_from_scratch/ch06 && python3 overfit_weight_decay.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Dropout:\n",
    "    def __init__(self, dropout_ratio=0.5):\n",
    "        self.dropout_ratio = dropout_ratio\n",
    "        self.mask = None\n",
    "    \n",
    "    def forward(self, x, train_flg=True):\n",
    "        if train_flg:\n",
    "            self.mask = np.random.rand(*x.shape) > self.dropout_ratio\n",
    "            return x * self.mask\n",
    "        else:\n",
    "            return x * (1.0 - self.dropout_ratio)\n",
    "    \n",
    "    def backward(self, dout):\n",
    "        return dout * self.mask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cd deep_learning_from_scratch/ch06 && python3 overfit_dropout.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hyper Parameter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from deep_learning_from_scratch.common.util import shuffle_dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(x_train, t_train), (x_test, t_test) = load_mnist()\n",
    "\n",
    "# 訓練データをシャッフル\n",
    "x_train, t_train = shuffle_dataset(x_train, t_train)\n",
    "\n",
    "# 検証データの分割\n",
    "validation_rate = 0.20\n",
    "validation_num = int(x_train.shape[0] * validation_rate)\n",
    "\n",
    "x_val = x_train[:validation_num]\n",
    "t_val = t_train[:validation_num]\n",
    "x_train = x_train[validation_num:]\n",
    "t_train = t_train[validation_num:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "weight_decay = 10 ** np.random.uniform(-8, 4)\n",
    "lr = 10 ** np.random.uniform(-6, -2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!cd deep_learning_from_scratch/ch06 && python3 hyperparameter_optimization.py"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
